setwd("/home/Data/Genotype/SzCausal_202211/Summary202301/")
source("../helper.R") #helper functions and library load
source("../notearsMClosses.R")
source("../utils.R")
library(readxl)
library(dplyr)
library(ggpubr)
library(reshape2)
library(ggplot2)
set.seed(101)
Read protein data
# proteomic data
synaptosome = read_xlsx("../oneDriveData/LMER-REGRESSION/RESIDUALS+DX_SYNAPTOSOME-PROTEIN-no-pH.xlsx")
phospho = read_xlsx("../oneDriveData/LMER-REGRESSION/RESIDUALS+DX_PHOSPHO-PEPTIDE-2022-11-08.xlsx")
homogenate = read_xlsx("../oneDriveData/LMER-REGRESSION/RESIDUALS+DX_HOMOGENATE-PROTEIN.xlsx")
colnames(synaptosome)[1]="X"
colnames(phospho)[1]="X"
colnames(homogenate)[1]="X"
synaptosome=as.data.frame(synaptosome)
phospho=as.data.frame(phospho)
homogenate=as.data.frame(homogenate)
Read spine density data
# spine density
spine.densL3 = read_xlsx("../oneDriveData/spine_density/spine_L3_SML_mean_medianNorm_forVas.xlsx")
spine.densL5 = read_xlsx("../oneDriveData/spine_density/spine_L5_SML_mean_medianNorm_20220604.xlsx")
Read phenotype data
# pheno
pheno = read_xlsx("../oneDriveData/metadata/All_Subject_Demographics.xlsx")
pheno = as.data.frame(pheno)
rownames(pheno) = paste0(c("S"), pheno$HU)
Read phenotype data and ID map
# mapping IDs
# phosphopeptide_to_phosphosite
phosphoMap_LMER = read.csv("../oneDriveData/LMER-REGRESSION/VAS_LMER-REGRESSION-PHOSPHO-SITE_2022-11-17.csv")
imputed_phos_mapped = readRDS("../savedRDS/imputed_phos_mapped_Vas.RDS")
# uniprot map
protMap_phos = read_xlsx("../oneDriveData/metadata/20220509_uniprot_map_legacy_phos.xlsx")
protMap_hom = read_xlsx("../oneDriveData/metadata/20220920_uniprot_map_legacy_hom.xlsx")
protMap_syn = read_xlsx("../oneDriveData/metadata/20221003_uniprot_map_legacy_syn.xlsx")
# combine spine density data
colnames(spine.densL3) = c("patient", "L3.spineS", "L3.spineM", "L3.spineL")
colnames(spine.densL5) = c("patient", "L5.spineS", "L5.spineM", "L5.spineL")
spine.dens = merge(spine.densL3, spine.densL5, by="patient")
spine.dens$patient = paste0(c("S"), spine.dens$patient)
rownames(spine.dens) = spine.dens$patient
Do average after t scale
# t-scaled spine.dens
spine.dens = tscale(t(spine.dens[,-1]))
# get averaged spine density
spine.dens = rbind(spine.dens,
(spine.dens[1,]+spine.dens[4,])/2,
(spine.dens[2,]+spine.dens[5,])/2,
(spine.dens[3,]+spine.dens[6,])/2)
rownames(spine.dens)[7:9] = c("Avg.spineS", "Avg.spineM", "Avg.spineL")
spine.dens[,1:5]
## S537 S567 S621 S659 S665
## L3.spineS 0.79071598 1.70373906 1.0379846 0.40711751 -0.35239506
## L3.spineM 0.74341847 1.21889312 -0.2756114 -0.05084451 -0.36374846
## L3.spineL 0.16157153 -0.03360054 -1.7370161 -0.39026201 -0.36374524
## L5.spineS 0.54341023 0.92139354 1.1607254 -0.18480922 0.26402584
## L5.spineM 0.12056845 0.18182508 -0.4756850 0.40036962 -0.01857248
## L5.spineL -0.35458365 -0.24950081 -0.3205690 1.93167993 0.68511677
## Avg.spineS 0.66706311 1.31256630 1.0993550 0.11115414 -0.04418461
## Avg.spineM 0.43199346 0.70035910 -0.3756482 0.17476255 -0.19116047
## Avg.spineL -0.09650606 -0.14155068 -1.0287926 0.77070896 0.16068576
Look at protein data
dim(phospho)
## [1] 8604 85
phospho[1:5,1:5]
## X S1543 S1247 S1240
## 1 [K].RLsLESEGAGEGAAASPELSALEEAFRR.[F] 0.08914847 0.06069769 -0.3763341
## 2 [R].QQFTVSSGEsPPLSAGNIYQk.[R] 0.08978403 -0.18648350 0.1182183
## 3 [R].GLYDGPVcEVSVtPk.[T] -0.25367412 -0.44800185 0.1993910
## 4 [R].TGVLLPSSPEAEVPGkLsPk.[Q] -0.18580943 0.25834698 0.4435583
## 5 [R].DVmSDETNNEEtESPSQEFVNITk.[Y] 0.06637524 -0.11895118 -0.3151305
## S659
## 1 0.1991965
## 2 0.5391897
## 3 -0.3116207
## 4 -0.1365558
## 5 0.2138412
dim(homogenate)
## [1] 6154 85
homogenate[1:5,1:5]
## X S1543 S1247 S1240 S659
## 1 A0A075B6I9 1.20958964 -0.20179116 0.2348509 -1.04283998
## 2 A0A075B767 0.32676616 0.40828776 0.3972039 -0.63299760
## 3 A0A087WSZ9 -0.02240467 0.09451659 0.1754651 -0.02535614
## 4 A0A087WW87 1.24859795 -0.08112229 -0.8601148 -0.66167317
## 5 A0A087X1C5 0.01549401 -0.04796556 -0.0296112 0.21076040
dim(synaptosome)
## [1] 4273 71
synaptosome[1:5,1:5]
## X S1524 S1542 S659 S1256
## 1 A0A087WW87 -0.07364627 0.2011440 -0.01602870 0.05803797
## 2 A0A0J9YX94 -0.07654129 0.1100552 0.01038487 0.36509269
## 3 A0AVF1 -0.01842234 0.0125747 0.17291865 -0.19032530
## 4 A0AVT1 0.28611163 0.4236436 -0.26265143 -0.36867685
## 5 A0FGR8 -0.32771033 0.3617452 0.22083443 -0.01426039
Map protein to Entry name
# map protein to gene
protMap_phos$`Entry name` = gsub("_.*", "", protMap_phos$`Entry name`)
protMap_hom$`Entry Name` = gsub("_.*", "", protMap_hom$`Entry Name`)
protMap_syn$`Entry Name` = gsub("_.*", "", protMap_syn$`Entry Name`)
Create Uniprot to Entry name map
hom = data.frame("EntryName" = protMap_hom[protMap_hom$From%in%homogenate$X,]$`Entry Name`,
"X" = protMap_hom[protMap_hom$From%in%homogenate$X,]$From)
syn = data.frame("EntryName" = protMap_syn[protMap_syn$From%in%synaptosome$X,]$`Entry Name`,
"X" = protMap_syn[protMap_syn$From%in%synaptosome$X,]$From)
Uniprot_Entry_conv = rbind(hom, syn)
Uniprot_Entry_conv = Uniprot_Entry_conv[!duplicated(Uniprot_Entry_conv), ]
head(Uniprot_Entry_conv)
## EntryName X
## 1 PLEC Q15149
## 2 DYHC1 Q14204
## 3 SPTN1 Q13813
## 4 SPTB2 Q01082
## 5 ANK2 Q01484
## 6 MACF1 Q9UPN3
Map homogenate and synaptosome IDs
iim = match(homogenate$X, Uniprot_Entry_conv$X)
sum(is.na(iim))
## [1] 1
homogenate[is.na(iim),]
## X S1543 S1247 S1240 S659 S930 S857
## 1101 P0DN79 0.1213453 -0.1068871 0.01900842 -0.2782531 -0.183471 0.2590462
## S1455 S988 S621 S1686 S1307 S10024
## 1101 0.2738417 0.09690885 -0.2400725 0.04792216 0.07524866 -0.05255184
## S1047 S933 S700 S625 S1480 S1263 S567
## 1101 0.1375628 0.2031501 -0.01041883 0.1243739 0.05355879 0.2949723 -0.1522442
## S537 S1374 S904 S686 S727 S829
## 1101 -0.1018258 -0.2401632 0.2778762 0.005830551 -0.1000433 0.2265546
## S1488 S1222 S1579 S1386 S1420 S1454
## 1101 -0.03472352 0.06512113 -0.03607005 0.02531974 0.01057712 0.02052511
## S685 S1105 S852 S722 S1086 S630 S566
## 1101 0.07248064 -0.08739716 0.115066 0.03907439 0.2112698 -0.2046825 0.1642356
## S1317 S1361 S1196 S1211 S10005 S1256
## 1101 0.02711624 0.04556534 -0.1300543 0.06370251 0.02369779 0.07975556
## S1314 S1372 S1581 S739 S1088 S587 S806
## 1101 -0.01773702 0.1784291 0.01911988 0.2730042 0.1866938 0.1962267 -0.01975071
## S665 S681 S640 S1326 S1453 S818 S917
## 1101 0.2309043 -0.1628133 0.04867391 -0.5196283 -0.0905203 -0.4174154 0.4013591
## S1391 S1506 S1384 S1712 S1099 S10023 S1067
## 1101 0.3448422 -0.0045312 0.1825578 -0.03122271 0.01367 -0.0830969 -0.08956527
## S1296 S1201 S1189 S1092 S1010 S871
## 1101 0.07860413 -0.1192308 -0.04048022 -0.04209686 0.1794047 0.2049523
## S878 S1524 S1542 S1555 S1649 S1268
## 1101 -0.009398474 -0.03464149 0.3536867 0.1820754 -0.2746762 -0.1230837
## S1230 S1284 S1188 S822 S787 S1119
## 1101 -0.1250359 0.1289821 0.2044734 0.06100962 -0.283646 -0.008278493
## S1173
## 1101 0.09637928
iim = match(synaptosome$X, Uniprot_Entry_conv$X)
sum(is.na(iim))
## [1] 1
synaptosome[is.na(iim),]
## X S1524 S1542 S659 S1256 S10005 S722
## 783 P0DN79 -0.05277195 0.2277988 -0.02293654 0.1215893 0.1946774 0.5274595
## S852 S818 S917 S1240 S1247 S1317 S1361
## 783 -0.1190994 -0.4102457 0.9094391 0.07329765 0.4754033 -0.07325104 0.1841
## S1372 S1222 S1488 S1455 S625 S700 S1307
## 783 0.2533631 0.7574052 0.05695838 -0.03196725 0.3854604 0.04219118 0.2423072
## S10024 S1230 S1268 S1026 S1555 S1649 S933
## 783 0.04874504 0.8768703 -0.3786701 0.2509153 -0.1995572 -0.08781356 0.5643208
## S1047 S1119 S1173 S546 S587 S1314 S1270
## 783 0.1431283 0.1201258 -0.2608134 0.4885759 0.2459213 0.6522106 -0.2168916
## S1579 S787 S822 S1384 S1712 S621 S988
## 783 0.1690741 -0.06982843 0.1290167 0.1520726 0.4581422 0.2114598 0.005760471
## S566 S1196 S1211 S685 S1105 S640 S681
## 783 0.9669385 -0.5352001 0.6442835 -0.06483736 0.7956093 0.8212378 -0.3953751
## S686 S1543 S10026 S537 S567 S1296 S1341
## 783 0.154504 -0.08148204 0.1499792 0.6596398 0.1020894 0.3519617 0.5240332
## S1466 S1263 S1480 S1391 S1506 S1284 S665
## 783 -0.4561494 -0.04124201 0.03518731 0.07048346 0.7399287 -0.120382 -0.3514289
## S806 S871 S878 S1189 S1255 S10020 S739
## 783 0.4980392 -0.1028951 8.869842e-05 0.5231688 0.03303334 0.9487006 -0.2180877
## S1088
## 783 0.6556996
Add protein P0DN79 to convert table
Uniprot_Entry_conv = rbind(Uniprot_Entry_conv, c("CBSL", "P0DN79"))
iim=match(homogenate$X, Uniprot_Entry_conv$X)
rownames(homogenate) = as.character(Uniprot_Entry_conv$EntryName[iim])
iim=match(synaptosome$X, Uniprot_Entry_conv$X)
rownames(synaptosome) = Uniprot_Entry_conv$EntryName[iim]
synaptosome$X = NULL
homogenate$X = NULL
Deal with phospho data
phospho$prot_site = imputed_phos_mapped$prot_site
For identical prot_site go to LMER results and pick one with min(q-val) to keep
phos_LMER = read_xlsx("../oneDriveData/LMER-REGRESSION/LMER-REGRESSION-PHOSPHO-PEPTIDE_2022-11-07.xlsx")
phos_LMER = as.data.frame(phos_LMER)
iim = match(phospho$X, phos_LMER$peptide)
phospho$qval = phos_LMER$q[iim]
tmp = phospho[!is.na(phospho$prot_site),]
tmp = tmp[order(tmp$prot_site, tmp$qval, decreasing = F),]
tmp = tmp[!duplicated(tmp$prot_site),]
phospho = tmp
rownames(phospho) = phospho$prot_site
phospho$X = NULL
phospho$prot_site = NULL
phospho$qval = NULL
Phospho site map
load("phospho_site_map.RData")
rownames(phospho) = phospho_site_abbr$rowname
Naming of phospho peptides: p + protein ID + phosphosite index
head(rownames(phospho))
## [1] "pA0FGR8" "pA0JNW5" "pA1IGU5" "pA1Z1Q3" "pA2A288" "pA2A2V5"
Look at data distribution
boxplot(homogenate)
boxplot(synaptosome)
boxplot(phospho)
boxplot(t(homogenate[1:50,]))
boxplot(t(synaptosome[1:50,]))
boxplot(t(phospho[1:50,]))
T-scale
# tscale
homogenate = tscale(homogenate)
phospho = tscale(phospho)
synaptosome = tscale(synaptosome)
Look at data distribution after t-scale
boxplot(t(homogenate[1:50,]))
boxplot(t(synaptosome[1:50,]))
boxplot(t(phospho[1:50,]))
rownames(synaptosome) = paste0(rownames(synaptosome), c('.syn'))
comCol = commonColumns(phospho, synaptosome)
intersectProt = rbind(phospho[,comCol], homogenate[,comCol], synaptosome[,comCol])
comCol = commonColumns(intersectProt, spine.dens)
intersectProt = rbind(phospho[,comCol], homogenate[,comCol], synaptosome[,comCol])
boxplot(intersectProt)
boxplot(t(intersectProt[c(1:20, 8000:8020, 17000:17020),]))
saveRDS(homogenate, "savedRDS/homogenate_LMERtscaled.RDS")
saveRDS(synaptosome, "savedRDS/synaptosome_LMERtscaled.RDS")
saveRDS(phospho, "savedRDS/phospho_LMERtscaled.RDS")
saveRDS(intersectProt, "savedRDS/intersectProt_LMERtscaled.RDS")
protein = intersectProt
spine.dens = spine.dens[,colnames(intersectProt)]
pheno = pheno[colnames(protein),]
multi_Avg=data.frame("patient"=colnames(spine.dens))
multi_L3=data.frame("patient"=colnames(spine.dens))
multi_L5=data.frame("patient"=colnames(spine.dens))
Run cv.glmnet (with keep=T) 100 times and average all the minimum cm results.
for (i in 1:100) {
# print(paste0("working on: ", i, "..."))
gres = cv.glmnet(y=spine.dens[7,], x=t(protein), family="gaussian", type.measure="mse", alpha=0.9, keep = T)
multi_Avg=cbind(multi_Avg, gres$fit.preval[,gres$cvm == min(gres$cvm)])
gres = cv.glmnet(y=spine.dens[1,], x=t(protein), family="gaussian", type.measure="mse", alpha=0.9, keep = T)
multi_L3=cbind(multi_L3, c(gres$fit.preval[,gres$cvm == min(gres$cvm)]))
gres = cv.glmnet(y=spine.dens[4,], x=t(protein), family="gaussian", type.measure="mse", alpha=0.9, keep = T)
multi_L5=cbind(multi_L5, c(gres$fit.preval[,gres$cvm == min(gres$cvm)]))
}
Use mean from 100 cv.glmnet results with min(cm) to get surrogate prediction
surrogateAvg = apply(multi_Avg[,-1], 1, mean)
surrogateL3 = apply(multi_L3[,-1], 1, mean)
surrogateL5 = apply(multi_L5[,-1], 1, mean)
spinePredict = rbind("surrogateAvg" = surrogateAvg, "surrogateL3" = surrogateL3, "surrogateL5" = surrogateL5)
PredictScatter <- function(data, ylim = c(-0.8,0.8), xlim = c(-2.3, 2), title=NULL, xlab=NULL, ylab=NULL,...) {
p = ggscatter(data, "real", "surrogate_prediction",
ylim=ylim, xlim=xlim,
add = "reg.line", conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson", label.x = -2.3, label.y = 0.6, label.sep = "\n", size=3),
title = title,
xlab = xlab,
ylab = ylab)
p
}
l3_l5 = data.frame("surrogate_prediction" = spine.dens[1,], "real" = spine.dens[4,])
p = PredictScatter(l3_l5, ylim = c(-2,2),
title = "L3 vs L5",
xlab = "L5", ylab = "L3")
p
avg = data.frame("surrogate_prediction" = surrogateAvg, "real"=spine.dens[7,])
l3 = data.frame("surrogate_prediction" = surrogateL3, "real"=spine.dens[1,])
l5 = data.frame("surrogate_prediction" = surrogateL5, "real"=spine.dens[4,])
l3p_l5 = data.frame("surrogate_prediction" = surrogateL3, "real"=spine.dens[4,])
l5p_l3 = data.frame("surrogate_prediction" = surrogateL5, "real"=spine.dens[1,])
l3p_l5p = data.frame("surrogate_prediction" = surrogateL3, "real"=surrogateL5)
p1 = PredictScatter(avg, title = "Avg.predict vs Avg.real",
xlab = "Avg.real", ylab = "Avg.surrogate_prediction")
p2 = PredictScatter(l3, title = "L3.predict vs L3.real",
xlab = "L3.real", ylab = "L3.surrogate_prediction")
p3 = PredictScatter(l5, title = "L5.predict vs L5.real",
xlab = "L5.real", ylab = "L5.surrogate_prediction")
p4 = PredictScatter(l3p_l5, title = "L3.predict vs L5.real",
xlab = "L5.real", ylab = "L3.surrogate_prediction")
p5 = PredictScatter(l5p_l3, title = "L5.predict vs L3.real",
xlab = "L3.real", ylab = "L5.surrogate_prediction")
p6 = PredictScatter(l3p_l5p, title = "L3.predict vs L5.predict",
xlab = "L5.surrogate_prediction", ylab = "L3.surrogate_prediction")
Predict vs real spine density
ggarrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2)
tscale on predicted spine density
spineData = rbind(spine.dens, tscale(spinePredict))
df = melt(as.matrix(spineData))
colnames(df) = c("type", "subj", "value")
df$pheno = pheno[as.character(df$subj),]$Group #since the subject id is actually a number if may get autoconverted
p = ggboxplot(df, x = "pheno", y = "value", facet.by = "type")+
stat_compare_means(method = "t.test", label.x = 1, label.y = 4) +
stat_compare_means(aes(label = ..p.signif..), method = "t.test", label.y = 3, label.x = 1.4)
facet(p,facet.by = "type", ncol=3)
correlation between spine.dens and diagnosis
cor.result = data.frame()
for(i in 1:nrow(spineData)) {
cor=cor.test(as.numeric(spineData[i,]), as.numeric(pheno[colnames(spineData),]$Group=="C"))
tmp=t(melt(unlist(cor)))
cor.result=rbind(cor.result, tmp)
}
rownames(cor.result)=rownames(spineData)
corR=as.data.frame(cor.result)
corR$statistic.t=as.numeric(corR$statistic.t)
corR$estimate.cor=as.numeric(corR$estimate.cor)
corR$p.value=as.numeric(corR$p.value)
ggscatter(corR, "estimate.cor", "statistic.t", label = rownames(corR), repel = T, title = "Spine density and disease diagnosis")
# create LVs using the protein dataset
prot.data=matrix(unlist(protein),ncol = ncol(protein))
rownames(prot.data) = rownames(protein)
colnames(prot.data) = colnames(protein)
res = simpleDecomp(tscale(prot.data), k=20, adaptive.frac = 0.01)
## ****
## Computing SVD
## [1] "L1 is set to 39.1454705695919"
## [1] "L2 is set to 117.436411708776"
## Init
## converged at iteration 67
dim(res$Z)
## [1] 17143 20
#we should see some with very low variance (drop out)
boxplot(t(res$B), las=2)
plot(sort(bvar<-apply(res$B,1,var)), log="y")
#1e-4 is a good cutoff
#look at the Z, how does the pattern in Z depend on adaptive.frac?
p = pheatmap(getTopEachColumn(res$Z[, bvar>1e-4],top = 20), main="cutoff bvar>0.01")
SzP = rownames(pheno[pheno$Group=='Sz',])
Ctrl = rownames(pheno[pheno$Group=='C',])
limma
limres = fitLimma(protein, as.factor(pheno$Group), "Sz-C")
topTable(limres, n=30)
## logFC AveExpr t P.Value adj.P.Val B
## 1433T.syn 1.610003 -0.024210535 6.421753 1.449222e-10 8.064091e-07 13.56665
## KTN1.syn 1.602663 -0.013868570 6.393741 1.739276e-10 8.064091e-07 13.39442
## FN3K.syn 1.595121 -0.003397722 6.364181 2.106803e-10 8.064091e-07 13.21349
## SNX2.syn 1.590265 -0.042487072 6.343754 2.404065e-10 8.064091e-07 13.08893
## TRIM2.syn 1.585402 -0.020206905 6.322845 2.750686e-10 8.064091e-07 12.96185
## COF1.syn 1.579901 -0.040310663 6.299658 3.192230e-10 8.064091e-07 12.82140
## 1433F.syn 1.569705 -0.008944295 6.261592 4.071452e-10 8.064091e-07 12.59194
## PTPA.syn 1.565659 -0.049253269 6.244392 4.542476e-10 8.064091e-07 12.48871
## XPO7.syn 1.562210 -0.001096893 6.233101 4.880178e-10 8.064091e-07 12.42109
## 1433E.syn 1.558736 -0.046139704 6.217201 5.397609e-10 8.064091e-07 12.32608
## CYRIA.syn 1.556076 -0.025105927 6.208205 5.713676e-10 8.064091e-07 12.27243
## pP61764.3 -1.555988 0.023285102 -6.204376 5.853632e-10 8.064091e-07 12.24962
## GDS1.syn 1.553987 -0.012940026 6.197453 6.115218e-10 8.064091e-07 12.20841
## PCY2.syn 1.547864 -0.044222959 6.173350 7.117982e-10 8.715969e-07 12.06529
## HNRPD.syn 1.544260 -0.010018544 6.158077 7.834622e-10 8.737768e-07 11.97489
## MARE3.syn 1.541914 -0.006965985 6.151682 8.155182e-10 8.737768e-07 11.93710
## PURA2.syn 1.533612 -0.064650863 6.115419 1.022975e-09 9.995281e-07 11.72356
## 1433G.syn 1.531810 -0.030298143 6.110442 1.055193e-09 9.995281e-07 11.69435
## 1433Z.syn 1.530272 -0.037679811 6.102625 1.107801e-09 9.995281e-07 11.64852
## CSN5.syn 1.516221 -0.012447993 6.045193 1.580994e-09 1.355149e-06 11.31356
## FAF1.syn 1.513511 -0.072829802 6.036399 1.669015e-09 1.362472e-06 11.26255
## GSLG1.syn -1.510924 0.026154310 -6.026970 1.768706e-09 1.378224e-06 11.20794
## PPME1.syn 1.505743 -0.045685001 6.005468 2.018229e-09 1.421423e-06 11.08371
## HNRPK.syn 1.504202 0.005252015 6.002862 2.050706e-09 1.421423e-06 11.06868
## pP55327.1 -1.505663 0.099833704 -6.001105 2.072891e-09 1.421423e-06 11.05856
## HGS.syn 1.502475 -0.026823754 5.991686 2.195870e-09 1.447838e-06 11.00431
## GUAA.syn 1.500551 -0.051628875 5.984622 2.292722e-09 1.455709e-06 10.96370
## SNX6.syn 1.498201 -0.033460567 5.974907 2.432777e-09 1.489467e-06 10.90790
## DP13A.syn 1.495068 -0.045357310 5.964633 2.589925e-09 1.531003e-06 10.84900
## DCTN4.syn 1.490574 -0.030042992 5.943971 2.936520e-09 1.649118e-06 10.73083
run multi-elastic nets
resGN = runGnetMulti(t(protein), y=pheno$Group=="Sz", nfolds=3, bootstrap = T, alpha = 0.9)
## family is set to binomial
choose featrues to label
# choose features to label
toLabel = rownames(protein)[which(resGN$features>75 | abs(limres$t)>6)]
length(toLabel) #do not plot more than 30 labels, they will be subsampled
## [1] 33
scatter plot for feature selection
myggscatter(resGN$features, limres$t, line = F, label = rownames(protein),
xlab="Number of models chosen in 100 elastic net regressions",
ylab="t-statistic value",
label.select = toLabel,
title="Focal variable: diagnosis (Sz-Control)",
subtitle = "models > 60 or abs(t) > 6")
Limma has no difference compared with spinePredict excluded
data = rbind(protein, tscale(spinePredict))
limres.predictS = fitLimma(data, as.factor(pheno$Group), "Sz-C")
topTable(limres.predictS, n=30)
## logFC AveExpr t P.Value adj.P.Val B
## 1433T.syn 1.610003 -0.024210535 6.422513 1.446083e-10 8.049500e-07 13.56879
## KTN1.syn 1.602663 -0.013868570 6.394546 1.734950e-10 8.049500e-07 13.39686
## FN3K.syn 1.595121 -0.003397722 6.365003 2.101279e-10 8.049500e-07 13.21605
## SNX2.syn 1.590265 -0.042487072 6.344533 2.398388e-10 8.049500e-07 13.09125
## TRIM2.syn 1.585402 -0.020206905 6.323562 2.745217e-10 8.049500e-07 12.96381
## COF1.syn 1.579901 -0.040310663 6.300325 3.186849e-10 8.049500e-07 12.82308
## 1433F.syn 1.569705 -0.008944295 6.262355 4.061960e-10 8.049500e-07 12.59423
## PTPA.syn 1.565659 -0.049253269 6.245112 4.533061e-10 8.049500e-07 12.49075
## XPO7.syn 1.562210 -0.001096893 6.233915 4.867119e-10 8.049500e-07 12.42371
## 1433E.syn 1.558736 -0.046139704 6.217934 5.385851e-10 8.049500e-07 12.32823
## CYRIA.syn 1.556076 -0.025105927 6.208999 5.698980e-10 8.049500e-07 12.27495
## pP61764.3 -1.555988 0.023285102 -6.205035 5.843531e-10 8.049500e-07 12.25134
## GDS1.syn 1.553987 -0.012940026 6.198152 6.103085e-10 8.049500e-07 12.21037
## PCY2.syn 1.547864 -0.044222959 6.174059 7.103291e-10 8.699501e-07 12.06733
## HNRPD.syn 1.544260 -0.010018544 6.158749 7.820146e-10 8.716890e-07 11.97672
## MARE3.syn 1.541914 -0.006965985 6.152468 8.134272e-10 8.716890e-07 11.93961
## PURA2.syn 1.533612 -0.064650863 6.116079 1.021127e-09 9.977682e-07 11.72535
## 1433G.syn 1.531810 -0.030298143 6.111187 1.052725e-09 9.977682e-07 11.69664
## 1433Z.syn 1.530272 -0.037679811 6.103304 1.105657e-09 9.977682e-07 11.65043
## CSN5.syn 1.516221 -0.012447993 6.045812 1.578450e-09 1.353205e-06 11.31517
## FAF1.syn 1.513511 -0.072829802 6.037095 1.665530e-09 1.359865e-06 11.26461
## GSLG1.syn -1.510924 0.026154310 -6.027699 1.764641e-09 1.375297e-06 11.21019
## PPME1.syn 1.505743 -0.045685001 6.006162 2.013989e-09 1.420040e-06 11.08578
## HNRPK.syn 1.504202 0.005252015 6.003692 2.044685e-09 1.420040e-06 11.07154
## pP55327.1 -1.505663 0.099833704 -6.001642 2.070513e-09 1.420040e-06 11.05973
## HGS.syn 1.502475 -0.026823754 5.992349 2.191644e-09 1.445305e-06 11.00622
## GUAA.syn 1.500551 -0.051628875 5.985309 2.287980e-09 1.452952e-06 10.96574
## SNX6.syn 1.498201 -0.033460567 5.975578 2.427940e-09 1.486766e-06 10.90986
## DP13A.syn 1.495068 -0.045357310 5.965390 2.583427e-09 1.527429e-06 10.85145
## DCTN4.syn 1.490574 -0.030042992 5.944619 2.931035e-09 1.647096e-06 10.73268
resGN.predictS = runGnetMulti(t(data), y=pheno$Group=="Sz", nfolds=3, bootstrap = T, alpha = 0.9)
## family is set to binomial
# hist(resGN.predictS$vals, main = "resGN.predictS") + title(sub = "alpha=0.9")
# hist(resGN.predictS$features[resGN.predictS$features > 0], main = "resGN_features") + title(sub="alpha=0.9")
choose featrues to label
# choose features to label
toLabel.predictS = rownames(data)[which(resGN.predictS$features>75 | abs(limres.predictS$t)>6)]
length(toLabel.predictS) #do not plot more than 30 labels, they will be subsampled
## [1] 33
scatter plot for feature selection
myggscatter(resGN.predictS$features, limres.predictS$t, line = F, label = rownames(data),
xlab="Number of models chosen in 100 elastic net regressions",
ylab="t-statistic value",
label.select = toLabel.predictS,
title="Focal variable: diagnosis (Sz-Control)",
subtitle = "models > 60 or abs(t) > 6")
## Warning in myggscatter(resGN.predictS$features, limres.predictS$t, line = F, :
## More than 30 labels were subsampled to 30
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
limma
spine.dens = as.matrix(spine.dens)
modSpine = model.matrix(~1+spine.dens[7,])
colnames(modSpine)[2] = "spineD"
limresSpine = fitLimma(protein, modSpine , "spineD - 0")
topTable(limresSpine, n=30)
## logFC AveExpr t P.Value adj.P.Val B
## pQ8IVF5.4 -0.8934745 -0.076267606 -5.353717 8.618617e-08 0.001477489 7.431116
## DPYL1.syn -0.8570338 -0.053946060 -5.135363 2.816563e-07 0.002414217 6.361533
## pQ92688 0.8425958 -0.038933652 5.048851 4.445602e-07 0.002540365 5.950076
## pQ9H706 0.8184724 -0.110583926 4.904303 9.377557e-07 0.004018987 5.278201
## OGA.syn -0.8040669 -0.029200734 -4.817985 1.450385e-06 0.004972789 4.886289
## PURA2.syn -0.7841306 -0.064650863 -4.698526 2.620829e-06 0.007488145 4.355392
## pQ9UQB3.1 -0.7648836 -0.049361315 -4.583198 4.579786e-06 0.008358399 3.855492
## SODM.syn 0.7638344 -0.028518932 4.576911 4.719532e-06 0.008358399 3.828599
## TCPA.syn -0.7627312 -0.062298015 -4.570300 4.870884e-06 0.008358399 3.800360
## VP26C.syn -0.7626967 -0.038831394 -4.570094 4.875692e-06 0.008358399 3.799478
## 1433Z.syn -0.7530230 -0.037679811 -4.512129 6.418801e-06 0.008679003 3.553627
## 1433S.syn -0.7514992 -0.083566757 -4.502998 6.700955e-06 0.008679003 3.515186
## CNDP2.syn -0.7510871 -0.069975733 -4.500529 6.779295e-06 0.008679003 3.504801
## TCPE.syn -0.7485420 -0.052082128 -4.485278 7.282757e-06 0.008679003 3.440806
## pP21359 0.7427877 0.009170296 4.450799 8.556154e-06 0.008679003 3.296917
## pP23528.3 0.7415122 0.040656564 4.443156 8.865912e-06 0.008679003 3.265173
## pQ96PR1.1 0.7406776 -0.049388739 4.438155 9.074386e-06 0.008679003 3.244430
## DCTN4.syn -0.7389771 -0.030042992 -4.427965 9.513712e-06 0.008679003 3.202241
## AGRB1.syn 0.7368582 0.008209034 4.415269 1.008960e-05 0.008679003 3.149807
## pQ6IQ19 0.7367035 -0.008968839 4.414342 1.013291e-05 0.008679003 3.145986
## 1433E.syn -0.7317216 -0.046139704 -4.384490 1.162705e-05 0.008679003 3.023323
## TCPQ.syn -0.7306619 -0.026529317 -4.378141 1.197091e-05 0.008679003 2.997341
## pP51608.5 0.7302414 0.023459948 4.375621 1.211006e-05 0.008679003 2.987038
## NCOA7.syn -0.7266821 -0.050324376 -4.354294 1.335109e-05 0.008679003 2.900091
## PTPA.syn -0.7255760 -0.049253269 -4.347666 1.376088e-05 0.008679003 2.873156
## TCPD.syn -0.7234032 -0.016869695 -4.334646 1.460101e-05 0.008679003 2.820367
## pO14525.1 0.7230085 0.012618937 4.332281 1.475878e-05 0.008679003 2.810794
## LRP1.syn 0.7229549 -0.023836803 4.331960 1.478031e-05 0.008679003 2.809496
## GSLG1.syn 0.7228363 0.026154310 4.331250 1.482810e-05 0.008679003 2.806621
## pP46821.87 0.7215738 0.054415971 4.323685 1.534607e-05 0.008679003 2.776044
run multi-elastic net
resGN.spine = runGnetMulti(t(protein), y=spine.dens[7,], nfolds=3, bootstrap = T, alpha = 0.9)
## family is set to gaussian
selection criteria
# classification AUCs
# hist(as.numeric(resGN.spine$vals), main = "resGN.spine")+title(sub="alpha=0.9")
# number of times features are used
# hist(resGN.spine$features[resGN.spine$features > 0], main = "resGN.spine_features") + title(sub="alpha=0.9")
choose features to label
toLabel.Spine = rownames(protein)[which(resGN.spine$features>60 | abs(limresSpine$t)>4.4)]
length(toLabel.Spine) #do not plot more than 30 labels, they will be subsampled
## [1] 31
scatter plot
myggscatter(resGN.spine$features, limresSpine$t, line = F, label = rownames(protein),
xlab="Number of models chosen in 100 elastic net regressions",
ylab="t-statistic value",
label.select = toLabel.Spine,
title="Focal variable: Avr.SpineS",
subtitle = "models > 60 or abs(t) > 4.4")
## Warning in myggscatter(resGN.spine$features, limresSpine$t, line = F, label =
## rownames(protein), : More than 30 labels were subsampled to 30
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
SzP = rownames(pheno[pheno$Group=='Sz',])
Ctrl = rownames(pheno[pheno$Group=='C',])
# marginal association for Avr.spineS in Sz and Controls
limresSz = fitLimma(protein[,SzP], modSpine[which(pheno$Group=="Sz"),], "spineD - 0")
limresCtrl = fitLimma(protein[,Ctrl], modSpine[which(pheno$Group=="C"),], "spineD - 0")
topTable(limresSz)
## logFC AveExpr t P.Value adj.P.Val B
## pP17096.4 1.0756418 0.28145853 4.268223 2.321618e-05 0.3979950 -3.265292
## pQ92688 1.0077131 -0.44406577 4.012478 6.843059e-05 0.5865528 -3.425685
## pQ6IQ19 0.9676038 -0.37448605 3.803591 1.586395e-04 0.6436434 -3.550098
## pQ9H706 0.9581040 -0.41943871 3.789843 1.674420e-04 0.6436434 -3.558076
## pQ969T4 0.9530234 -0.33394129 3.734906 2.074311e-04 0.6436434 -3.589693
## RL37A.syn 0.9376995 -0.18918192 3.713557 2.252733e-04 0.6436434 -3.601867
## pQ15057 0.8826156 -0.03376813 3.481205 5.389212e-04 0.9094734 -3.730232
## pQ6H8Q1.6 0.8825834 -0.15424995 3.480261 5.407822e-04 0.9094734 -3.730738
## pP23468.1 0.8644651 0.03465700 3.432091 6.441229e-04 0.9094734 -3.756390
## pO15027.2 0.8484686 -0.18462014 3.335929 9.076863e-04 0.9094734 -3.806612
topTable(limresCtrl)
## logFC AveExpr t P.Value adj.P.Val B
## PPM1E.syn -0.9458450 -0.34740985 -3.473051 0.0006771436 0.9999921 -3.961855
## pQ96IZ7 0.8914463 -0.07009488 3.386813 0.0009085221 0.9999921 -3.993812
## pQ15036.2 0.8970421 0.13330417 3.341981 0.0010562638 0.9999921 -4.010195
## pO75190.1 -0.8579486 -0.36272190 -3.267242 0.0013534502 0.9999921 -4.037149
## pQ9UQ35.87 0.8792725 -0.46504635 3.245957 0.0014513746 0.9999921 -4.044743
## HNRPC.syn -0.8435109 0.05558890 -3.217678 0.0015916982 0.9999921 -4.054774
## pQ5T200.4 0.8420023 -0.07732842 3.160574 0.0019143067 0.9999921 -4.074829
## pP19634.4 0.8300463 0.48413324 3.153196 0.0019601525 0.9999921 -4.077400
## pQ09470.2 0.8230990 0.34378664 3.129157 0.0021166955 0.9999921 -4.085746
## pO60241.1 -0.8342739 -0.30105360 -3.094255 0.0023646789 0.9999921 -4.097777
resGN.spineSz = runGnetMulti(t(protein[,SzP]), y=spine.dens[7,SzP], nfolds=3, bootstrap = T, alpha = 0.9)
## family is set to gaussian
resGN.spineCtrl = runGnetMulti(t(protein[,Ctrl]), y=spine.dens[7,Ctrl], nfolds=3, bootstrap = T, alpha = 0.9)
## family is set to gaussian
# hist(resGN.spineSz$features[resGN.spineSz$features > 0], main = "resGN.spineSz_features") + title(sub="alpha=0.9")
# hist(resGN.spineCtrl$features[resGN.spineCtrl$features > 0], main = "resGN.spineCtrl_features") + title(sub="alpha=0.9")
toLabel.SpineSz = rownames(protein)[which(resGN.spineSz$features>5 | abs(limresSz$t)>3.5)]
length(toLabel.SpineSz) #do not plot more than 30 labels, they will be subsampled
## [1] 11
toLabel.SpineCtrl = rownames(protein)[which(resGN.spineCtrl$features>5 | abs(limresCtrl$t)>3)]
length(toLabel.SpineCtrl) #do not plot more than 30 labels, they will be subsampled
## [1] 20
myggscatter(resGN.spineSz$features, limresSz$t, line = F, label = rownames(protein),
xlab="Number of models chosen in 100 elastic net regressions",
ylab="t-statistic value",
label.select = toLabel.SpineSz,
title="Focal variable: Avr.SpineS, Sz group", subtitle = "models > 5 or abs(t) > 3.5")
myggscatter(resGN.spineCtrl$features, limresCtrl$t, line = F, label = rownames(protein),
xlab="Number of models chosen in 100 elastic net regressions",
ylab="t-statistic value",
label.select = toLabel.SpineCtrl,
title="Focal variable: Avr.SpineS, Ctrl group", subtitle = "models > 5 or abs(t) > 3")
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
toLabel.SpineMarginal = unique(c(toLabel.SpineSz, toLabel.SpineCtrl))
# limma: nothing with FDR < 0.05
modSpineInt=model.matrix(~ as.numeric(pheno$Group=='Sz') + spine.dens[1,] + as.numeric(pheno$Group=='Sz'):spine.dens[1,])
colnames(modSpineInt)[4] = "SzSpine"
limresProtSpineInteraction=fitLimma(protein, modSpineInt , "SzSpine - 0")
hist(limresProtSpineInteraction$p.value)
topTable(limresProtSpineInteraction)
## logFC AveExpr t P.Value adj.P.Val B
## pO14976 1.0941124 -0.11475539 3.533019 0.0004202093 0.999942 -4.058568
## pQ9C0D0.1 -1.0546524 0.08819297 -3.418221 0.0006425060 0.999942 -4.095722
## pQ9H307.2 -0.9811641 -0.02608153 -3.160182 0.0016004114 0.999942 -4.175194
## RAE2 0.9708722 -0.03278399 3.133518 0.0017524109 0.999942 -4.183047
## pQ9UKM9.2 0.9545776 0.04792047 3.080798 0.0020927219 0.999942 -4.198381
## pQ5T200.4 -0.9244881 0.09490526 -2.989077 0.0028324904 0.999942 -4.224451
## pQ96AG3.1 -0.9133800 0.14983950 -2.943205 0.0032859246 0.999942 -4.237198
## VAMP3.syn -0.9053292 0.02395728 -2.928646 0.0034424237 0.999942 -4.241177
## pQ15036.2 -0.8944883 0.09819100 -2.890277 0.0038907264 0.999942 -4.251665
## MADD.syn 0.8904018 -0.01244468 2.885051 0.0039539454 0.999942 -4.253026
graphSampling <- function(protein, LV, spine, prot.use, pheno = NULL, spinePredict = NULL, n = 20, lambda1 = 0.1) {
graph_list = list()
for (i in 1:n) {
prot.sub = unique(sample(prot.use, replace=T))
nt.data = cbind(t(protein[prot.sub,]), t(LV), "Spine" = spine)
if(!is.null(pheno)) {
nt.data = cbind(nt.data, "Sz" = pheno)
}
if (!is.null(spinePredict)) {
nt.data = cbind(nt.data, t(spinePredict))
}
ntres = notearsInterceptMultiLoss(nt.data, lambda1 = lambda1)
graph = ntres$graph
graph_list[[i]]=graph
}
graph_list
}
graphConstruction <- function (graph_list, name, thres = 0.2, n = 20, n_cutoff = 10) {
l = length(name)
newgraph = matrix(data=c(0.0), nrow = l, ncol = l)
rownames(newgraph) = name
colnames(newgraph) = rownames(newgraph)
for (i in 1:l) {
for (j in 1:l) {
cnt = 0;
sum = 0.0;
row_name=name[i]
col_name=name[j]
for (k in 1:n) {
namek = rownames(graph_list[[k]])
if (row_name %in% namek & col_name %in% namek) {
if(abs(graph_list[[k]][row_name, col_name]) >= thres) {
cnt = cnt+1;
sum = sum + graph_list[[k]][row_name, col_name]
}
}
}
if (cnt >= n_cutoff) {
newgraph[i,j] = 1.0*sum/cnt
}
}
}
newgraph
}
Nodes
prot.use = toLabel
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]
Sample defalut n=20
SampleDx_Predict = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict))
SampleDx_NoSz_Predict = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
spinePredict = tscale(spinePredict))
SampleDx = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
pheno = pheno$Group=="Sz")
SampleDx_NoSz = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,)
look at top 20/30/50 edge cutoff
for (i in 1:20) {
sorted = sort(abs(SampleDx_NoSz[[i]]), T)
print(c(sorted[21], sorted[31], sorted[51]))
}
## [1] 0.3032011 0.2268166 0.1232914
## [1] 0.3266034 0.2183912 0.1374592
## [1] 0.2825062 0.2303501 0.1264078
## [1] 0.3343950 0.2158181 0.1257517
## [1] 0.3186735 0.1788678 0.1077261
## [1] 0.3351227 0.2354051 0.1430549
## [1] 0.2441629 0.1886909 0.1027224
## [1] 0.3399627 0.2510639 0.1311407
## [1] 0.2935994 0.2126151 0.1065618
## [1] 0.3243118 0.2506505 0.1532260
## [1] 0.3172415 0.2028005 0.1156485
## [1] 0.27082708 0.21342237 0.09029881
## [1] 0.3327649 0.2464871 0.1104643
## [1] 0.34912482 0.19763875 0.07772649
## [1] 0.3015028 0.2631653 0.1516571
## [1] 0.35446586 0.22910402 0.09652205
## [1] 0.2565801 0.2006097 0.1055853
## [1] 0.3330527 0.2187216 0.1304274
## [1] 0.29297826 0.19356645 0.08885504
## [1] 0.3509489 0.2311544 0.1365131
Construct new graph with default threshold=0.2
name = c(prot.use, lv.use, "Spine", "Sz", rownames(spinePredict))
GraphDx_Predict = graphConstruction(graph_list = SampleDx_Predict, name = name)
Szgroup = length
name = c(prot.use, lv.use, "Spine", rownames(spinePredict))
GraphDx_NoSz_Predict = graphConstruction(graph_list = SampleDx_NoSz_Predict, name = name)
name = c(prot.use, lv.use, "Spine", "Sz")
GraphDx = graphConstruction(graph_list = SampleDx, name = name)
name = c(prot.use, lv.use, "Spine")
GraphDx_NoSz = graphConstruction(graph_list = SampleDx_NoSz, name = name)
Visualize
# graph varType
SYN=length(grep(".syn", prot.use))
Phos=length(grep("p", prot.use))
HOM=length(prot.use)-SYN-Phos
Prot = c(rep("Phos", Phos), rep("Hom", HOM), rep("Syn", SYN))
lenLV = rep("LV",length(lv.use))
Focal variable: diagnosis
gvarType = c(Prot, lenLV, "Spine", "Sz", rep("PredictSpine",3))
network_visualize(GraphDx_Predict, gvarType)
gvarType = c(Prot, lenLV, "Spine", rep("PredictSpine",3))
network_visualize(GraphDx_NoSz_Predict, gvarType)
gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphDx, gvarType)
gvarType = c(Prot, lenLV, "Spine")
network_visualize(GraphDx_NoSz, gvarType)
Nodes
prot.use = toLabel.Spine
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]
Sample defalut n=20
SampleSpine_Predict = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict))
SampleSpine_NoSz_Predict = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
spinePredict = tscale(spinePredict))
SampleSpine = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
pheno = pheno$Group=="Sz")
SampleSpine_NoSz = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use)
look at top 20/30/50 edge cutoff
for (i in 1:20) {
sorted = sort(abs(SampleSpine_NoSz[[i]]), T)
print(c(sorted[21], sorted[31], sorted[51]))
}
## [1] 0.2530889 0.1907942 0.1145262
## [1] 0.2794521 0.2120853 0.1472086
## [1] 0.2312279 0.1662027 0.1150866
## [1] 0.2405788 0.1947410 0.1094901
## [1] 0.2825583 0.1963016 0.1136881
## [1] 0.26486286 0.17420982 0.09494001
## [1] 0.2472721 0.1916586 0.1171055
## [1] 0.2743022 0.2069258 0.1310184
## [1] 0.24072757 0.19809292 0.09993045
## [1] 0.23020913 0.17054136 0.09590431
## [1] 0.2698287 0.2279909 0.1359897
## [1] 0.2604606 0.1947551 0.1075155
## [1] 0.2711087 0.2143835 0.1029340
## [1] 0.2666014 0.2093371 0.1431590
## [1] 0.2390029 0.1861309 0.1050572
## [1] 0.2735049 0.2029109 0.1201743
## [1] 0.23541449 0.16127586 0.09253285
## [1] 0.2570886 0.1983001 0.1230010
## [1] 0.2577424 0.2255661 0.1581288
## [1] 0.2575156 0.1980991 0.1197271
Construct new graph with default threshold=0.2
name = c(prot.use, lv.use, "Spine", "Sz", rownames(spinePredict))
GraphSpine_Predict = graphConstruction(graph_list = SampleSpine_Predict, name = name)
name = c(prot.use, lv.use, "Spine", rownames(spinePredict))
GraphSpine_NoSz_Predict = graphConstruction(graph_list = SampleSpine_NoSz_Predict, name = name)
name = c(prot.use, lv.use, "Spine", "Sz")
GraphSpine = graphConstruction(graph_list = SampleSpine, name = name)
name = c(prot.use, lv.use, "Spine")
GraphSpine_NoSz = graphConstruction(graph_list = SampleSpine_NoSz, name = name)
Visualize
# graph varType
SYN=length(grep(".syn", prot.use))
Phos=length(grep("p", prot.use))
HOM=length(prot.use)-SYN-Phos
Prot = c(rep("Phos", Phos), rep("Hom", HOM), rep("Syn", SYN))
lenLV = rep("LV",length(lv.use))
focal variable: avg.spineS
gvarType = c(Prot, lenLV, "Spine", "Sz", rep("PredictSpine",3))
network_visualize(GraphSpine_Predict, gvarType)
gvarType = c(Prot, lenLV, "Spine", rep("PredictSpine",3))
network_visualize(GraphSpine_NoSz_Predict, gvarType)
gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphSpine, gvarType)
gvarType = c(Prot, lenLV, "Spine")
network_visualize(GraphSpine_NoSz, gvarType)
Nodes
prot.use = toLabel.SpineMarginal
lv.use = colnames(res$Z)[which(colSums(res$Z[prot.use,])>0)]
Sample defalut n=20
SampleSpineM_Predict = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
pheno = pheno$Group=="Sz", spinePredict = tscale(spinePredict))
SampleSpineM_NoSz_Predict = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
spinePredict = tscale(spinePredict))
SampleSpineM = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use,
pheno = pheno$Group=="Sz")
SampleSpineM_NoSz = graphSampling(protein = tscale(protein), LV = tscale(res$B[lv.use,]), spine = tscale(spine.dens)[7,], prot.use = prot.use)
look at top 20/30/50 edge cutoff
for (i in 1:20) {
sorted = sort(abs(SampleSpineM_Predict[[i]]), T)
print(c(sorted[21], sorted[31], sorted[51]))
}
## [1] 0.2710647 0.2166053 0.1507901
## [1] 0.2671103 0.2329998 0.1625837
## [1] 0.2907356 0.2266787 0.1432652
## [1] 0.2801489 0.2179566 0.1444579
## [1] 0.2937074 0.2256596 0.1734228
## [1] 0.2685831 0.2062389 0.1389371
## [1] 0.2595864 0.2258823 0.1768834
## [1] 0.2650412 0.2156483 0.1381464
## [1] 0.2652130 0.2031971 0.1466063
## [1] 0.2656629 0.2225641 0.1589334
## [1] 0.2925721 0.2398683 0.1612636
## [1] 0.2606099 0.2054587 0.1529708
## [1] 0.2883908 0.2121162 0.1383166
## [1] 0.2740144 0.2300112 0.1457400
## [1] 0.3019276 0.2534426 0.1795049
## [1] 0.2928640 0.2381717 0.1733600
## [1] 0.2584094 0.2106583 0.1463333
## [1] 0.2575325 0.2315021 0.1478743
## [1] 0.2556917 0.2059857 0.1361789
## [1] 0.2720559 0.2399823 0.1721586
Construct new graph with default threshold=0.2
name = c(prot.use, lv.use, "Spine", "Sz", rownames(spinePredict))
GraphSpineM_Predict = graphConstruction(graph_list = SampleSpineM_Predict, name = name)
name = c(prot.use, lv.use, "Spine", rownames(spinePredict))
GraphSpineM_NoSz_Predict = graphConstruction(graph_list = SampleSpineM_NoSz_Predict, name = name)
name = c(prot.use, lv.use, "Spine", "Sz")
GraphSpineM = graphConstruction(graph_list = SampleSpineM, name = name)
name = c(prot.use, lv.use, "Spine")
GraphSpineM_NoSz = graphConstruction(graph_list = SampleSpineM_NoSz, name = name)
Visualize
# graph varType
Prot = c(rep("spineSz", length(toLabel.SpineSz)), rep("SpineCtrl", length(toLabel.SpineCtrl)))
lenLV = rep("LV",length(lv.use))
focal variable: avg.spineS, in Sz or Ctrl group
gvarType = c(Prot, lenLV, "Spine", "Sz", rep("PredictSpine",3))
network_visualize(GraphSpineM_Predict, gvarType)
gvarType = c(Prot, lenLV, "Spine", rep("PredictSpine",3))
network_visualize(GraphSpineM_NoSz_Predict, gvarType)
gvarType = c(Prot, lenLV, "Spine", "Sz")
network_visualize(GraphSpineM, gvarType)
gvarType = c(Prot, lenLV, "Spine")
network_visualize(GraphSpineM_NoSz, gvarType)
t_res=tscale(res$B)
LV_pH = data.frame()
for (i in 1:nrow(t_res)) {
cor=cor.test(t_res[i,], pheno$pH)
a=t(melt(unlist(cor)))
LV_pH = rbind(LV_pH, a)
}
rownames(LV_pH)=rownames(t_res)
LV_pH=as.data.frame(LV_pH)
LV_pH$p.value=as.numeric(LV_pH$p.value)
LV_pH$estimate.cor=as.numeric(LV_pH$estimate.cor)
ggscatter(LV_pH, "estimate.cor", "p.value", label = rownames(LV_pH), repel = T,
xlim = c(-0.5, 0.5),
ylab = "log10 scaled p-val",
xlab = "Pearson correlation") +
scale_y_log10(breaks = c(1e-4, 1e-3, 1e-2, 0.05, 0.1, 0.25, 0.5)) +
labs(title = "Correlation between LVs and pH")